Effect of cohesin looping on genome architecture, from the perspective of the nuclear lamina.
In a previous document, I classified LAD borders as 1) CTCF-bound or not 2) shared or unique. Here, I will visualize what the effect is of CTCF depletion on the various types of LAD borders. Also, the effect of the other depletions.
This is a variation on another document. Instead of selecting borders that do not overlap with active genes, here I specifically look at borders that do overlap.
Load (z-scale) DamID tracks and plot effect on different types of LAD borders.
Load the libraries and set the parameters.
# Load dependencies
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(GenomicRanges))
suppressPackageStartupMessages(library(rtracklayer))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(caTools))
suppressPackageStartupMessages(library(yaml))
suppressPackageStartupMessages(library(M3C))
suppressPackageStartupMessages(library(ggbeeswarm))
suppressPackageStartupMessages(library(broom))
suppressPackageStartupMessages(library(corrr))
bin.size <- "10kb"
damid.dir <- file.path("../results_NQ/normalized/", paste0("bin-", bin.size))
# Prepare output
output.dir <- "ts220113_effect_of_CTCF_depletion_on_LAD_borders_with_active_genes"
dir.create(output.dir, showWarnings = FALSE)
# Prepare input
input.dir <- "ts220113_CTCF_enrichment_at_LAD_borders"
metadata.borders <- readRDS(file.path(input.dir, "metadata.rds"))
LADs <- readRDS(file.path(input.dir, "LADs.rds"))
LAD.borders <- readRDS(file.path(input.dir, "LAD_borders.rds"))
CTCF.sites <- readRDS(file.path(input.dir, "CTCF_sites.rds"))
LADs <- readRDS(file.path(input.dir, "LADs_pA.rds"))
LAD.borders <- readRDS(file.path(input.dir, "LAD_borders_pA.rds"))
borders <- LAD.borders[["mESC_pA_PT"]]
borders$class <- "xxx"Prepare knitr output.
library(knitr)
opts_chunk$set(fig.width = 10, fig.height = 4, cache = T,
message = F, warning = F,
dev=c('png', 'pdf'), fig.path = file.path(output.dir, "figures/"))
pdf.options(useDingbats = FALSE)List functions.
LoadDamID <- function(metadata, damid.dir, column = "file") {
# Load data
tib <- purrr::reduce(lapply(1:nrow(metadata),
function(i) {
f <- metadata[i, ] %>% pull(column)
s <- as.character(metadata$sample)[i]
tmp <- read_tsv(file.path(damid.dir, f),
col_names = c("seqnames", "start", "end", s))
}),
full_join)
# Convert to GRanges
tib$start <- tib$start + 1
gr <- as(tib, "GRanges")
# Filter chromosomes
gr <- gr[seqnames(gr) %in% c(paste0("chr", 1:22), "chrX")]
gr
}
LoadDamIDCounts <- function(metadata, damid.dir.counts,
yaml = "../bin/snakemake/config.yaml") {
# List samples from yaml file
yaml_list <- read_yaml(yaml)
# Read lmnb1 counts
idx <- which(names(yaml_list$dam_controls) %in% unlist(yaml_list$replicates))
lmnb1_files <- names(yaml_list$dam_controls)[idx]
dam_files <- unlist(yaml_list$dam_controls)[idx]
# Prepare counts metadata
metadata_counts <- tibble(lmnb1 = lmnb1_files,
dam = dam_files) %>%
mutate(sample = str_remove(lmnb1, "pADamID_NQ_"),
sample = str_remove(sample, "pADamID_"),
sample = str_remove(sample, "-")) %>%
separate(sample, c("condition", "timepoint", "replicate"), remove = F) %>%
mutate(idx = match(paste0(condition, timepoint),
paste0(metadata$condition, metadata$timepoint)),
group = metadata$sample[idx],
lmnb1 = paste0(lmnb1, "-10kb.counts.txt.gz"),
dam = paste0(dam, "-10kb.counts.txt.gz")) %>%
arrange(idx, replicate)
# Read lmnb1 files
gr_lmnb1 <- LoadDamID(metadata_counts, damid.dir.counts, column = "lmnb1")
# CPM and mean of replicates
tib <- as_tibble(mcols(gr_lmnb1)) %>%
mutate_all(function(x) x / sum(x) * 1e6) %>%
add_column(bin = 1:nrow(.)) %>%
gather(key, value, -bin) %>%
mutate(sample = metadata_counts$group[match(key, metadata_counts$sample)]) %>%
group_by(bin, sample) %>%
summarise(mean = mean(value)) %>%
ungroup() %>%
spread(sample, mean) %>%
dplyr::select(-bin)
mcols(gr_lmnb1) <- tib
# Read dam files
gr_dam <- LoadDamID(metadata_counts, damid.dir.counts, column = "dam")
# CPM and mean of replicates
tib <- as_tibble(mcols(gr_dam)) %>%
mutate_all(function(x) x / sum(x) * 1e6) %>%
add_column(bin = 1:nrow(.)) %>%
gather(key, value, -bin) %>%
mutate(sample = metadata_counts$group[match(key, metadata_counts$sample)]) %>%
group_by(bin, sample) %>%
summarise(mean = mean(value)) %>%
ungroup() %>%
spread(sample, mean) %>%
dplyr::select(-bin)
mcols(gr_dam) <- tib
list(gr_lmnb1, gr_dam)
}
PlotDensity <- function(damid, title, xlimits = NULL, replicate = F, conditions = NULL) {
if (replicate) {
tib <- as_tibble(mcols(damid)) %>%
gather() %>%
separate(key, c("condition", "timepoint", "replicate"), remove = F, sep = "_") %>%
mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint)),
replicate = factor(replicate, levels = paste0("r", 1:10))) %>%
drop_na()
} else {
tib <- as_tibble(mcols(damid)) %>%
gather() %>%
separate(key, c("condition", "timepoint"), remove = F, sep = "_") %>%
mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint))) %>%
drop_na()
}
nrow <- 2
if (! is.null(conditions)) {
tib <- tib %>%
filter(condition %in% conditions)
nrow <- 1
}
plt <- tib %>%
ggplot(aes(x = value, col = timepoint)) +
#geom_density() +
facet_wrap( ~ condition, nrow = nrow) +
ggtitle(title) +
xlab("DamID score") +
ylab("Density") +
scale_color_brewer(palette = "Dark2") +
theme_bw() +
theme(aspect.ratio = 1)
if (replicate) {
plt <- plt + geom_density(aes(linetype = replicate))
} else {
plt <- plt + geom_density()
}
if (! is.null(xlimits)) {
plt <- plt +
coord_cartesian(xlim = xlimits)
}
plt
}
ScaleDamID <- function(damid) {
tib <- as_tibble(mcols(damid))
tib.scaled <- as_tibble(scale(tib))
mcols(damid) <- tib.scaled
damid
}
grMid <- function(gr) {
start(gr) <- end(gr) <- rowMeans(cbind(start(gr), end(gr)))
gr
}
DistanceToLADBorder <- function(sites, borders, nearest = T, border.class = F) {
# Given a GRanges of damid and LAD borders, calculate the
# distance to the preceding and following LAD. Return a new GRanges
# Make sure the chromosomes are as they should be - especially for the bins
sites <- sites[seqnames(sites) %in% c(paste0("chr", 1:22),
"chrX")]
borders <- borders[seqnames(borders) %in% c(paste0("chr", 1:22),
"chrX")]
# Preceding distance
sites$dis.precede <- sites$strand.precede <-
sites$border.class.precede <- sites$border.ctcf.precede <- NA
idx.precede <- precede(sites, borders, ignore.strand = T, select = "all")
sites$dis.precede[queryHits(idx.precede)] <- distance(sites[queryHits(idx.precede)],
borders[subjectHits(idx.precede)],
ignore.strand = T)
sites$strand.precede[queryHits(idx.precede)] <- strand(borders[subjectHits(idx.precede)])
sites$border.class.precede[queryHits(idx.precede)] <- borders$class[subjectHits(idx.precede)]
sites$border.ctcf.precede[queryHits(idx.precede)] <- borders$CTCF[subjectHits(idx.precede)]
# Following distance
sites$dis.follow <- sites$strand.follow <-
sites$border.class.follow <- sites$border.ctcf.follow <- NA
idx.follow <- follow(sites, borders, ignore.strand = T, select = "all")
sites$dis.follow[queryHits(idx.follow)] <- distance(sites[queryHits(idx.follow)],
borders[subjectHits(idx.follow)],
ignore.strand = T)
sites$strand.follow[queryHits(idx.follow)] <- strand(borders[subjectHits(idx.follow)])
sites$border.class.follow[queryHits(idx.follow)] <- borders$class[subjectHits(idx.follow)]
sites$border.ctcf.follow[queryHits(idx.follow)] <- borders$CTCF[subjectHits(idx.follow)]
# Exception: overlapping (follow = distance (0), precede = NA)
idx.overlap <- findOverlaps(sites, borders, ignore.strand = T)
sites$dis.follow[queryHits(idx.overlap)] <- distance(sites[queryHits(idx.overlap)],
borders[subjectHits(idx.overlap)],
ignore.strand = T)
sites$dis.precede[queryHits(idx.overlap)] <- NA
sites$strand.follow[queryHits(idx.overlap)] <- sites$strand.precede[queryHits(idx.overlap)] <-
strand(borders[subjectHits(idx.overlap)])
sites$border.class.precede[queryHits(idx.overlap)] <-
sites$border.class.follow[queryHits(idx.overlap)] <-
borders$class[subjectHits(idx.overlap)]
sites$border.ctcf.precede[queryHits(idx.overlap)] <-
sites$border.ctcf.follow[queryHits(idx.overlap)] <-
borders$CTCF[subjectHits(idx.overlap)]
# Alternative: only use information from the nearest hit
if (nearest) {
# Remove precede information if follow is smaller
idx.remove.precede <- which(sites$dis.follow < sites$dis.precede)
sites$dis.precede[idx.remove.precede] <- NA
# Remove follow information if precede is smaller
idx.remove.follow <- which(sites$dis.follow > sites$dis.precede)
sites$dis.follow[idx.remove.follow] <- NA
}
sites
}
CountPerBins <- function(sites, bin.size = 10000) {
tib <- as_tibble(mcols(sites)) %>%
add_column(number = 1:nrow(.)) %>%
mutate(dis.precede.group = as.numeric(cut(dis.precede,
breaks = seq(0, max(dis.precede, na.rm = T) + 1,
by = bin.size))) - 1,
dis.follow.group = as.numeric(cut(dis.follow,
breaks = seq(0, max(dis.follow, na.rm = T) + 1,
by = bin.size))) - 1) %>%
dplyr::select(-dis.precede, -dis.follow) %>%
mutate(dis.precede.group = ifelse(strand.precede == "+",
-dis.precede.group, dis.precede.group),
dis.follow.group = ifelse(strand.follow == "+",
dis.follow.group, -dis.follow.group)) %>%
dplyr::select(-strand.precede, -strand.follow) %>%
gather(key, value, dis.precede.group, dis.follow.group) %>%
drop_na() %>%
mutate(border.class = ifelse(key == "dis.precede.group",
border.class.precede, border.class.follow),
border.ctcf = ifelse(key == "dis.precede.group",
border.ctcf.precede, border.ctcf.follow)) %>%
dplyr::select(-border.class.precede, -border.class.follow,
-border.ctcf.precede, -border.ctcf.follow) %>%
gather(sample, score, -number, -key, -value, -border.class, -border.ctcf) %>%
group_by(value, border.class, border.ctcf, sample) %>%
summarise(count = n(),
mean = mean(score)) %>%
ungroup() %>%
mutate(sample = factor(sample, levels = levels(metadata.damid$sample)),
border.ctcf = factor(border.ctcf, levels = c("nonCTCF", "CTCF")),
border.class = factor(border.class, levels = c("shared", "unique"))) %>%
arrange(sample, border.class, border.ctcf)
tib
}
PlotDamIDScoresAndDifferences <- function(damid.summary,
xlimits = c(-0.4, 0.4),
ylimits.diff = c(-0.3, 0.2),
extra_grouping = NULL) {
tib <- damid.summary %>%
mutate(mean = runmean(mean, k = 5)) %>%
group_by_at(c("sample", "value", extra_grouping)) %>%
summarise(combined.count = sum(count),
combined.mean = weighted.mean(mean, count)) %>%
filter(combined.count > 20) %>%
separate(sample, c("condition", "timepoint"), remove = F) %>%
mutate(condition = factor(condition, levels = levels(metadata.damid$condition)),
timepoint = factor(timepoint, levels = levels(metadata.damid$timepoint))) %>%
ungroup()
plt <- tib %>%
ggplot(aes(x = value / 100, y = combined.mean, col = border.ctcf)) +
annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
geom_line(size = 1) +
facet_grid(as.formula(paste(paste(c("condition"),
collapse = " + "),
"~", "timepoint"))) +
ggtitle("DamID scores at LAD borders") +
xlab("Distance from LAD border (Mb)") +
ylab("DamID (z-score)") +
coord_cartesian(xlim = xlimits, ylim = c(-1, 1)) +
scale_color_manual(values = c("blue", "red")) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))
plot(plt)
# Also, plot the difference between 0h and the others
tib.difference <- tib %>%
dplyr::select(-sample, -combined.count) %>%
spread(key = timepoint, value = combined.mean) %>%
mutate(diff.6h = `6h` - `0h`,
diff.24h = `24h` - `0h`,
diff.96h = `96h` - `0h`) %>%
dplyr::select(-`0h`, -`6h`, -`24h`, -`96h`) %>%
gather(timepoint, combined.difference, diff.6h, diff.24h, diff.96h) %>%
mutate(timepoint = factor(timepoint, levels = c("diff.6h", "diff.24h", "diff.96h")))
plt <- tib.difference %>%
ggplot(aes(x = value / 100, y = combined.difference, col = border.ctcf)) +
annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
geom_line(size = 1) +
facet_grid(as.formula(paste(paste(c("condition"),
collapse = " + "),
"~", "timepoint"))) +
ggtitle("DamID difference at LAD borders") +
xlab("Distance from LAD border (Mb)") +
ylab("DamID difference (z-score)") +
coord_cartesian(xlim = xlimits, ylim = ylimits.diff) +
scale_color_manual(values = c("blue", "red")) +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))
plot(plt)
# Also, plot the difference between CTCF and non-CTCF borders
tib.difference <- tib %>%
dplyr::select(-sample, -combined.count) %>%
spread(key = border.ctcf, value = combined.mean) %>%
mutate(diff = CTCF - nonCTCF) %>%
dplyr::select(-nonCTCF, -CTCF)
plt <- tib.difference %>%
ggplot(aes(x = value / 100, y = diff, col = timepoint)) +
annotate("rect", xmin = 0, xmax = 1e3, ymin = -1, ymax = 5,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
geom_line(size = 1) +
facet_wrap( ~ condition, nrow = 2) +
ggtitle("DamID difference at CTCF borders") +
xlab("Distance from LAD border (Mb)") +
ylab("DamID difference (z-score)") +
coord_cartesian(xlim = xlimits, ylim = ylimits.diff) +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))
plot(plt)
}Load the required data. First, the previous data sets that I need.
# See aboveAlso, load DamID data. I will work with combined data tracks of replicates.
Also, load the counts. These can be used to illustrate whether any effects are caused by a change in accessibility or Lamin B1 reads.
# Prepare DamID metadata
metadata.damid <- tibble(dir(damid.dir, pattern = "combined.*norm.txt.gz"),
.name_repair = ~ c("file")) %>%
mutate(sample = str_remove(str_remove(file, "pADamID_"),
paste0("-", bin.size, ".*"))) %>%
mutate(sample = str_remove(sample, "^NQ_")) %>%
mutate(sample = str_replace_all(sample, "-", "")) %>%
separate(sample, c("condition", "timepoint"), remove = F, sep = "_") %>%
mutate(condition = factor(condition, levels = c("PT", "CTCFEL", "CTCFNQ",
"CTCF", "RAD21", "WAPL", "CTCFWAPL")),
timepoint = factor(timepoint, levels = c("0h", "6h", "24h", "96h"))) %>%
arrange(condition, timepoint) %>%
add_column(cell = factor("mESC")) %>%
mutate(sample = factor(sample, levels = unique(sample))) %>%
drop_na()
# Load DamID
damid <- LoadDamID(metadata.damid, damid.dir)Before I can continue with the DamID data, I want to convert the values first to z-scores. Also, make density plots.
# Scale DamID
damid <- ScaleDamID(damid)I need to add the distance to LAD border (and which LAD border) to the DamID data. First, filter LAD borders for positioning near active genes.
Note that most of these analyses are old and removed. Instead, see below for better analyses of LAD borders, where I process all borders individually.
# Only for borders without genes
borders <- borders[borders$ovl_gene == T] Having these distances, I can now start plotting the DamID profile around LAD borders. Also, I can plot the difference between the later timepoints and 0h.
The previous plots are all based on average effects. Here, I will try to work with individual LAD borders, to see if the real differences are only for a subset of CTCF borders.
First, I will determine the distance to LAD borders for all genomic bins. Use LAD borders defined in the pA-DamID data for the parental clone.
GatherBorders <- function(damid, borders, lads) {
# Get the distances to the nearest LAD borders for all damid bins
damid.mid <- damid
start(damid.mid) <- end(damid.mid) <- (start(damid.mid) + end(damid.mid)) / 2
dis <- as_tibble(distanceToNearest(damid.mid, borders))
# Round distances to the nearest 5kb
dis <- dis %>%
mutate(distance = ceiling(distance / 5000) * 5000)
# Also, determine which bins overlap with lads
ovl <- damid.mid %over% lads
# Process data as tibble
tib <- as_tibble(damid.mid) %>%
add_column(border_idx = dis$subjectHits,
border_ctcf = borders$CTCF[dis$subjectHits],
border_ctcf_n = borders$CTCF.count[dis$subjectHits],
border_ctcf_strand = borders$CTCF_strand[dis$subjectHits],
distance = dis$distance,
within_lad = ovl) %>%
mutate(distance = case_when(within_lad == 1 ~ distance,
T ~ -distance),
border_ctcf_strand = factor(border_ctcf_strand,
levels = c("outwards", "inwards",
"ambiguous", "nonCTCF")),
border_ctcf_n = case_when(border_ctcf_n >= 3 ~ ">=3",
border_ctcf_n == 2 ~ "2",
border_ctcf_n == 1 ~ "1",
T ~ "nonCTCF"),
border_ctcf_n = factor(border_ctcf_n,
levels = c("nonCTCF", "1", "2", ">=3"))) %>%
filter(abs(distance) < 2e5)
# # Only work with "complete" borders (remove small iLADs / LADs)
# borders_complete <- which(as.numeric(table(tib$border_idx)) > 25)
#
# tib <- tib %>%
# filter(border_idx %in% borders_complete)
# Plot all lines as "test"
tib %>%
ggplot(aes(x = distance / 1e3, y = PT_0h)) +
#geom_line(aes(group = border_idx), alpha = 0.1) +
geom_smooth(aes(group = border_ctcf, col = border_ctcf), se = T) +
xlab("Distance to LAD border (kb)") +
ylab("pA-DamID (z-score)") +
theme_bw() +
theme(aspect.ratio = 1)
tib
}
tib_damid <- GatherBorders(damid, borders = borders,
lads = LADs[["mESC_pA_PT"]])Next, plot the effect of depletion at LAD borders. I will prepare several figures / results here:
Not only do this for depletions at +/- CTCF borders, but also based on CTCF strand and CTCF count. Also, for the unnormalized data (as cpm) to illustrate whether the effects are caused by differences in LaminB1 or Dam signal.
sd_fun <- function(x, y = 1.3) {
return(data.frame(y = y, label = round(sd(x), 2)))
}
PlotBordersWithConfidenceIntervals <- function(tib, samples,
ylim = c(-0.9, 0.65),
smooth = 1,
group = "border_ctcf",
keep_PT = F,
filter_96h = T) {
if (filter_96h) {
samples <- samples[! grepl("96h", samples)]
}
# Gather tib
if (smooth != 1) {
tib <- tib %>%
mutate_at(vars(ends_with("h")), runmean, k = smooth, endrule = "mean")
}
tib_gather <- tib %>%
gather(key, value,
grep("[0-9]+h$", names(tib), value = T)) %>%
mutate(key = factor(key, levels = levels(metadata.damid$sample)))
tib_gather$group <- tib_gather %>% pull(group)
if (keep_PT) {
plt <- tib_gather %>%
filter(key %in% samples) %>%
ggplot(aes(x = distance / 1e3, y = value,
group = key, col = key, fill = key)) +
annotate("rect", xmin = 0, xmax = 300, ymin = -10, ymax = 10,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
#geom_line(aes(group = border_idx), alpha = 0.1) +
#geom_smooth(aes(group = key, col = key), se = T) +
stat_summary(fun = mean, geom = "line", size = 1) +
stat_summary(fun.data = mean_se, geom = "ribbon", alpha = 0.25, col = NA,
fun.args = list(mult = 1.96)) +
#facet_grid(. ~ border_ctcf) +
facet_grid(group ~ .) +
xlab("Distance to LAD border (kb)") +
ylab("pA-DamID (z-score)") +
coord_cartesian(xlim = c(-200, 200), ylim = ylim, expand = F) +
scale_color_brewer(palette = "Set1", name = "Border class") +
scale_fill_brewer(palette = "Set1", name = "Border class", guide = "none") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))
plot(plt)
samples_without_pt <- samples
if (length(samples) == 1) return(NULL)
} else {
# Plot some tracks
plt <- tib_gather %>%
filter(key %in% samples & key != "PT_0h") %>%
ggplot(aes(x = distance / 1e3, y = value,
group = key, col = key, fill = key)) +
annotate("rect", xmin = 0, xmax = 300, ymin = -10, ymax = 10,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
#geom_line(aes(group = border_idx), alpha = 0.1) +
#geom_smooth(aes(group = key, col = key), se = T) +
stat_summary(fun = mean, geom = "line", size = 1) +
stat_summary(fun.data = mean_se, geom = "ribbon", alpha = 0.25, col = NA,
fun.args = list(mult = 1.96)) +
#facet_grid(. ~ border_ctcf) +
facet_grid(group ~ .) +
xlab("Distance to LAD border (kb)") +
ylab("pA-DamID (z-score)") +
coord_cartesian(xlim = c(-200, 200), ylim = ylim, expand = F) +
scale_color_brewer(palette = "Set1", name = "Border class") +
scale_fill_brewer(palette = "Set1", name = "Border class", guide = "none") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))
plot(plt)
samples_without_pt <- samples[2:length(samples)]
}
# # Can I also quantify the difference within the LAD?
# tib_ctcf <- tib %>%
# filter(distance > 20e3 & distance < 120e3) %>%
# dplyr::select(border_idx, all_of(samples_without_pt), all_of(group)) %>%
# dplyr::rename_at(vars(group), ~ "group") %>%
# group_by(border_idx, group) %>%
# summarise_at(samples_without_pt, mean, na.rm = T) %>%
# ungroup() %>%
# dplyr::rename_at(samples_without_pt[1], ~c("base")) %>%
# mutate_at(samples_without_pt[2:length(samples_without_pt)],
# function(x) x - .$base) %>%
# dplyr::select(border_idx, group,
# samples_without_pt[2:length(samples_without_pt)]) %>%
# gather(key, value, -group, -border_idx) %>%
# mutate(key = factor(key, samples_without_pt),
# group = factor(group, levels = c("nonCTCF",
# "CTCF",
# "outwards", "inwards", "ambiguous",
# "1", "2", ">=3")))
#
# # Plot by time point
# plt <- tib_ctcf %>%
# ggplot(aes(x = group, y = value, col = group)) +
# geom_quasirandom() +
# geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
# geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
# facet_grid(. ~ key) +
# coord_cartesian(ylim = c(-1.5, 1.5)) +
# scale_color_brewer(palette = "Set2", guide = "none") +
# xlab("") +
# ylab("Difference within LAD with t=0h") +
# theme_bw() +
# theme(aspect.ratio = 1.5,
# axis.text.x = element_text(angle = 90, hjust = 1))
# plot(plt)
# Can I also quantify the difference between the local CTCF depletion?
tib_ctcf <- tib %>%
filter(distance > -20e3 & distance < 0) %>%
dplyr::select(border_idx, all_of(samples_without_pt), all_of(group)) %>%
dplyr::rename_at(vars(group), ~ "group") %>%
group_by(border_idx, group) %>%
summarise_at(samples_without_pt, mean, na.rm = T) %>%
ungroup() %>%
dplyr::rename_at(samples_without_pt[1], ~c("base")) %>%
mutate_at(samples_without_pt[2:length(samples_without_pt)],
function(x) x - .$base) %>%
dplyr::select(border_idx, group,
samples_without_pt[2:length(samples_without_pt)]) %>%
gather(key, value, -group, -border_idx) %>%
mutate(key = factor(key, samples_without_pt),
group = factor(group, levels = c("nonCTCF",
"CTCF",
"outwards", "inwards", "ambiguous",
"1", "2", ">=3")))
# Plot by time point
plt <- tib_ctcf %>%
ggplot(aes(x = group, y = value, col = group)) +
geom_quasirandom() +
geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
stat_summary(fun.data = sd_fun, geom = "text", col = "black") +
facet_grid(. ~ key) +
coord_cartesian(ylim = c(-1.5, 1.5)) +
scale_color_brewer(palette = "Set2", guide = "none") +
xlab("") +
ylab("Difference outside LAD border with t=0h") +
theme_bw() +
theme(aspect.ratio = 1.5,
axis.text.x = element_text(angle = 90, hjust = 1))
plot(plt)
# # Plot by border class
# plt <- tib_ctcf %>%
# ggplot(aes(x = key, y = value, col = key)) +
# geom_quasirandom() +
# geom_boxplot(fill = NA, col = "black", outlier.shape = NA) +
# geom_hline(yintercept = 0, col = "darkgrey", linetype = "dashed") +
# facet_grid(. ~ group) +
# scale_color_brewer(palette = "Set1", guide = F) +
# xlab("") +
# ylab("Difference outside LAD border with t=0h") +
# theme_bw() +
# theme(aspect.ratio = 1.5,
# axis.text.x = element_text(angle = 90, hjust = 1))
# plot(plt)
# Statistics - difference from diff = 0
tib_ctcf %>%
group_by(group, key) %>%
dplyr::summarise(pvalue = wilcox.test(value)$p.value) %>%
ungroup() %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)
# Statistics - difference with nonCTCF borders
tib_stat <- tibble()
for (current_group in levels(tib_ctcf$group)) {
for (current_key in levels(tib_ctcf$key)) {
if (! current_group %in% tib_ctcf$group) next
if (current_group %in% "nonCTCF") next
if (! current_key %in% tib_ctcf$key) next
tmp <- wilcox.test(tib_ctcf %>%
filter(group == current_group &
key == current_key) %>%
pull(value),
tib_ctcf %>%
filter(group == "nonCTCF" &
key == current_key) %>%
pull(value))
tib_stat <- bind_rows(tib_stat,
tibble(group = current_group,
key = current_key,
pvalue = tmp$p.value))
}
}
tib_stat %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)
tib_stat
}
# CTCF vs non-CTCF
PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFEL_0h",
"RAD21_0h", "WAPL_0h",
"CTCFWAPL_0h"),
keep_PT = T)## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFEL_0h 2.87e- 3 0.0172 TRUE
## 2 nonCTCF RAD21_0h 3.14e- 1 0.628 FALSE
## 3 nonCTCF WAPL_0h 2.05e- 1 0.614 FALSE
## 4 nonCTCF CTCFWAPL_0h 9.39e- 3 0.0376 TRUE
## 5 CTCF CTCFEL_0h 3.19e- 1 0.628 FALSE
## 6 CTCF RAD21_0h 1.51e- 6 0.0000106 TRUE
## 7 CTCF WAPL_0h 5.45e-10 0.00000000436 TRUE
## 8 CTCF CTCFWAPL_0h 4.45e- 3 0.0223 TRUE
## # A tibble: 4 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF CTCFEL_0h 0.00637 0.00756 TRUE
## 2 CTCF RAD21_0h 0.00378 0.00756 TRUE
## 3 CTCF WAPL_0h 0.0000236 0.0000942 TRUE
## 4 CTCF CTCFWAPL_0h 0.000111 0.000334 TRUE
## # A tibble: 4 × 3
## group key pvalue
## <chr> <chr> <dbl>
## 1 CTCF CTCFEL_0h 0.00637
## 2 CTCF RAD21_0h 0.00378
## 3 CTCF WAPL_0h 0.0000236
## 4 CTCF CTCFWAPL_0h 0.000111
tib_stat <- tibble()
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "PT_24h"),
keep_PT = T, ylim = c(-0.8, 0.8))## # A tibble: 2 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF PT_24h 0.154 0.154 FALSE
## 2 CTCF PT_24h 0.0000526 0.000105 TRUE
## # A tibble: 1 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF PT_24h 0.0693 0.0693 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "PT_24h", "CTCFEL_0h", "CTCFEL_6h",
"CTCFEL_24h", "CTCFEL_96h"),
keep_PT = T, ylim = c(-0.8, 0.8))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF PT_24h 0.154 0.768 FALSE
## 2 nonCTCF CTCFEL_0h 0.00287 0.0201 TRUE
## 3 nonCTCF CTCFEL_6h 0.0393 0.236 FALSE
## 4 nonCTCF CTCFEL_24h 0.159 0.768 FALSE
## 5 CTCF PT_24h 0.0000526 0.000421 TRUE
## 6 CTCF CTCFEL_0h 0.319 0.768 FALSE
## 7 CTCF CTCFEL_6h 0.163 0.768 FALSE
## 8 CTCF CTCFEL_24h 0.159 0.768 FALSE
## # A tibble: 4 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF PT_24h 0.0693 0.208 FALSE
## 2 CTCF CTCFEL_0h 0.00637 0.0255 TRUE
## 3 CTCF CTCFEL_6h 0.740 1 FALSE
## 4 CTCF CTCFEL_24h 0.893 1 FALSE
## # A tibble: 4 × 3
## group key pvalue
## <chr> <chr> <dbl>
## 1 CTCF PT_24h 0.0693
## 2 CTCF CTCFEL_0h 0.00637
## 3 CTCF CTCFEL_6h 0.740
## 4 CTCF CTCFEL_24h 0.893
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFEL_0h", "CTCFEL_6h",
"CTCFEL_24h", "CTCFEL_96h"),
ylim = c(-0.8, 0.8))## # A tibble: 4 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h 0.0770 0.0770 FALSE
## 2 nonCTCF CTCFEL_24h 0.0266 0.0532 FALSE
## 3 CTCF CTCFEL_6h 0.00306 0.00918 TRUE
## 4 CTCF CTCFEL_24h 0.000732 0.00293 TRUE
## # A tibble: 2 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF CTCFEL_6h 0.000538 0.000538 TRUE
## 2 CTCF CTCFEL_24h 0.0000642 0.000128 TRUE
tib_stat <- bind_rows(tib_stat, tmp)
PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFNQ_0h", "CTCFNQ_6h",
"CTCFNQ_24h", "CTCFNQ_96h"))## # A tibble: 4 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFNQ_6h 0.00186 0.00357 TRUE
## 2 nonCTCF CTCFNQ_24h 0.000000175 0.000000699 TRUE
## 3 CTCF CTCFNQ_6h 0.00179 0.00357 TRUE
## 4 CTCF CTCFNQ_24h 0.000122 0.000365 TRUE
## # A tibble: 2 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF CTCFNQ_6h 0.897 1 FALSE
## 2 CTCF CTCFNQ_24h 0.694 1 FALSE
## # A tibble: 2 × 3
## group key pvalue
## <chr> <chr> <dbl>
## 1 CTCF CTCFNQ_6h 0.897
## 2 CTCF CTCFNQ_24h 0.694
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "WAPL_0h", "WAPL_6h",
"WAPL_24h", "WAPL_96h"),
ylim = c(-0.95, 0.7))## # A tibble: 4 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF WAPL_6h 0.00563 0.00563 TRUE
## 2 nonCTCF WAPL_24h 0.00000217 0.00000434 TRUE
## 3 CTCF WAPL_6h 0.0000000983 0.000000295 TRUE
## 4 CTCF WAPL_24h 0.00000000192 0.00000000768 TRUE
## # A tibble: 2 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF WAPL_6h 0.00292 0.00583 TRUE
## 2 CTCF WAPL_24h 0.0111 0.0111 TRUE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h",
"CTCFWAPL_24h", "CTCFWAPL_96h"),
ylim = c(-0.95, 0.7))## # A tibble: 4 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h 0.506 1 FALSE
## 2 nonCTCF CTCFWAPL_24h 0.988 1 FALSE
## 3 CTCF CTCFWAPL_6h 0.00133 0.00533 TRUE
## 4 CTCF CTCFWAPL_24h 0.0228 0.0684 FALSE
## # A tibble: 2 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF CTCFWAPL_6h 0.0357 0.0714 FALSE
## 2 CTCF CTCFWAPL_24h 0.0727 0.0727 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "RAD21_0h", "RAD21_6h",
"RAD21_24h"),
ylim = c(-0.95, 0.7))## # A tibble: 4 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF RAD21_6h 0.000121 0.000363 TRUE
## 2 nonCTCF RAD21_24h 0.0000471 0.000188 TRUE
## 3 CTCF RAD21_6h 0.00245 0.00489 TRUE
## 4 CTCF RAD21_24h 0.300 0.300 FALSE
## # A tibble: 2 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF RAD21_6h 0.396 0.396 FALSE
## 2 CTCF RAD21_24h 0.0329 0.0658 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tib_stat %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)## # A tibble: 9 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 CTCF PT_24h 0.0693 0.208 FALSE
## 2 CTCF CTCFEL_6h 0.000538 0.00431 TRUE
## 3 CTCF CTCFEL_24h 0.0000642 0.000578 TRUE
## 4 CTCF WAPL_6h 0.00292 0.0204 TRUE
## 5 CTCF WAPL_24h 0.0111 0.0668 FALSE
## 6 CTCF CTCFWAPL_6h 0.0357 0.164 FALSE
## 7 CTCF CTCFWAPL_24h 0.0727 0.208 FALSE
## 8 CTCF RAD21_6h 0.396 0.396 FALSE
## 9 CTCF RAD21_24h 0.0329 0.164 FALSE
# CTCF + orientation vs non-CTCF
tib_stat <- tibble()
PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h"),
group = "border_ctcf_strand",
ylim = c(-0.80, 0.80),
keep_PT = T)## NULL
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "PT_24h"),
group = "border_ctcf_strand",
ylim = c(-0.80, 0.80),
keep_PT = T)## # A tibble: 4 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF PT_24h 0.154 0.307 FALSE
## 2 outwards PT_24h 0.221 0.307 FALSE
## 3 inwards PT_24h 0.00861 0.0258 TRUE
## 4 ambiguous PT_24h 0.00284 0.0114 TRUE
## # A tibble: 3 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 outwards PT_24h 0.544 0.544 FALSE
## 2 inwards PT_24h 0.224 0.447 FALSE
## 3 ambiguous PT_24h 0.0920 0.276 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFEL_0h", "CTCFEL_6h",
"CTCFEL_24h", "CTCFEL_96h"),
group = "border_ctcf_strand",
ylim = c(-0.80, 0.80))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h 0.0770 0.383 FALSE
## 2 nonCTCF CTCFEL_24h 0.0266 0.186 FALSE
## 3 outwards CTCFEL_6h 0.168 0.383 FALSE
## 4 outwards CTCFEL_24h 0.235 0.383 FALSE
## 5 inwards CTCFEL_6h 0.104 0.383 FALSE
## 6 inwards CTCFEL_24h 0.0765 0.383 FALSE
## 7 ambiguous CTCFEL_6h 0.0396 0.238 FALSE
## 8 ambiguous CTCFEL_24h 0.00817 0.0653 FALSE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 outwards CTCFEL_6h 0.0478 0.0945 FALSE
## 2 outwards CTCFEL_24h 0.0473 0.0945 FALSE
## 3 inwards CTCFEL_6h 0.0231 0.0693 FALSE
## 4 inwards CTCFEL_24h 0.00662 0.0315 TRUE
## 5 ambiguous CTCFEL_6h 0.00630 0.0315 TRUE
## 6 ambiguous CTCFEL_24h 0.00108 0.00646 TRUE
tib_stat <- bind_rows(tib_stat, tmp)
PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFNQ_0h", "CTCFNQ_6h",
"CTCFNQ_24h", "CTCFNQ_96h"),
group = "border_ctcf_strand",
ylim = c(-0.80, 0.80))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFNQ_6h 0.00186 0.0130 TRUE
## 2 nonCTCF CTCFNQ_24h 0.000000175 0.00000140 TRUE
## 3 outwards CTCFNQ_6h 0.0146 0.0691 FALSE
## 4 outwards CTCFNQ_24h 0.0138 0.0691 FALSE
## 5 inwards CTCFNQ_6h 0.0152 0.0691 FALSE
## 6 inwards CTCFNQ_24h 0.0912 0.182 FALSE
## 7 ambiguous CTCFNQ_6h 0.444 0.444 FALSE
## 8 ambiguous CTCFNQ_24h 0.00804 0.0482 TRUE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 outwards CTCFNQ_6h 0.385 1 FALSE
## 2 outwards CTCFNQ_24h 0.458 1 FALSE
## 3 inwards CTCFNQ_6h 0.356 1 FALSE
## 4 inwards CTCFNQ_24h 0.480 1 FALSE
## 5 ambiguous CTCFNQ_6h 0.284 1 FALSE
## 6 ambiguous CTCFNQ_24h 0.576 1 FALSE
## # A tibble: 6 × 3
## group key pvalue
## <chr> <chr> <dbl>
## 1 outwards CTCFNQ_6h 0.385
## 2 outwards CTCFNQ_24h 0.458
## 3 inwards CTCFNQ_6h 0.356
## 4 inwards CTCFNQ_24h 0.480
## 5 ambiguous CTCFNQ_6h 0.284
## 6 ambiguous CTCFNQ_24h 0.576
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "WAPL_0h", "WAPL_6h",
"WAPL_24h", "WAPL_96h"),
group = "border_ctcf_strand",
ylim = c(-1.05, 0.75))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF WAPL_6h 0.00563 0.0169 TRUE
## 2 nonCTCF WAPL_24h 0.00000217 0.0000152 TRUE
## 3 outwards WAPL_6h 0.388 0.388 FALSE
## 4 outwards WAPL_24h 0.134 0.268 FALSE
## 5 inwards WAPL_6h 0.0000349 0.000209 TRUE
## 6 inwards WAPL_24h 0.00000156 0.0000124 TRUE
## 7 ambiguous WAPL_6h 0.0000879 0.000439 TRUE
## 8 ambiguous WAPL_24h 0.000120 0.000479 TRUE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 outwards WAPL_6h 0.901 1 FALSE
## 2 outwards WAPL_24h 0.847 1 FALSE
## 3 inwards WAPL_6h 0.00512 0.0256 TRUE
## 4 inwards WAPL_24h 0.000775 0.00465 TRUE
## 5 ambiguous WAPL_6h 0.00783 0.0313 TRUE
## 6 ambiguous WAPL_24h 0.153 0.459 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h",
"CTCFWAPL_24h", "CTCFWAPL_96h"),
group = "border_ctcf_strand",
ylim = c(-1.05, 0.75))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h 0.506 1 FALSE
## 2 nonCTCF CTCFWAPL_24h 0.988 1 FALSE
## 3 outwards CTCFWAPL_6h 0.0526 0.368 FALSE
## 4 outwards CTCFWAPL_24h 0.543 1 FALSE
## 5 inwards CTCFWAPL_6h 0.112 0.560 FALSE
## 6 inwards CTCFWAPL_24h 0.157 0.627 FALSE
## 7 ambiguous CTCFWAPL_6h 0.0396 0.317 FALSE
## 8 ambiguous CTCFWAPL_24h 0.0639 0.383 FALSE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 outwards CTCFWAPL_6h 0.111 0.555 FALSE
## 2 outwards CTCFWAPL_24h 0.574 0.621 FALSE
## 3 inwards CTCFWAPL_6h 0.207 0.621 FALSE
## 4 inwards CTCFWAPL_24h 0.259 0.621 FALSE
## 5 ambiguous CTCFWAPL_6h 0.133 0.555 FALSE
## 6 ambiguous CTCFWAPL_24h 0.0806 0.484 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "RAD21_0h", "RAD21_6h",
"RAD21_24h"),
group = "border_ctcf_strand",
ylim = c(-1.05, 0.75))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF RAD21_6h 0.000121 0.000847 TRUE
## 2 nonCTCF RAD21_24h 0.0000471 0.000377 TRUE
## 3 outwards RAD21_6h 0.113 0.563 FALSE
## 4 outwards RAD21_24h 0.263 0.789 FALSE
## 5 inwards RAD21_6h 0.151 0.606 FALSE
## 6 inwards RAD21_24h 0.705 1 FALSE
## 7 ambiguous RAD21_6h 0.0356 0.214 FALSE
## 8 ambiguous RAD21_24h 0.605 1 FALSE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 outwards RAD21_6h 0.934 1 FALSE
## 2 outwards RAD21_24h 0.333 1 FALSE
## 3 inwards RAD21_6h 0.399 1 FALSE
## 4 inwards RAD21_24h 0.119 0.593 FALSE
## 5 ambiguous RAD21_6h 0.489 1 FALSE
## 6 ambiguous RAD21_24h 0.0868 0.521 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tib_stat %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40) ## # A tibble: 27 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 outwards PT_24h 0.544 1 FALSE
## 2 inwards PT_24h 0.224 1 FALSE
## 3 ambiguous PT_24h 0.0920 1 FALSE
## 4 outwards CTCFEL_6h 0.0478 0.945 FALSE
## 5 outwards CTCFEL_24h 0.0473 0.945 FALSE
## 6 inwards CTCFEL_6h 0.0231 0.485 FALSE
## 7 inwards CTCFEL_24h 0.00662 0.152 FALSE
## 8 ambiguous CTCFEL_6h 0.00630 0.151 FALSE
## 9 ambiguous CTCFEL_24h 0.00108 0.0280 TRUE
## 10 outwards WAPL_6h 0.901 1 FALSE
## 11 outwards WAPL_24h 0.847 1 FALSE
## 12 inwards WAPL_6h 0.00512 0.128 FALSE
## 13 inwards WAPL_24h 0.000775 0.0209 TRUE
## 14 ambiguous WAPL_6h 0.00783 0.172 FALSE
## 15 ambiguous WAPL_24h 0.153 1 FALSE
## 16 outwards CTCFWAPL_6h 0.111 1 FALSE
## 17 outwards CTCFWAPL_24h 0.574 1 FALSE
## 18 inwards CTCFWAPL_6h 0.207 1 FALSE
## 19 inwards CTCFWAPL_24h 0.259 1 FALSE
## 20 ambiguous CTCFWAPL_6h 0.133 1 FALSE
## 21 ambiguous CTCFWAPL_24h 0.0806 1 FALSE
## 22 outwards RAD21_6h 0.934 1 FALSE
## 23 outwards RAD21_24h 0.333 1 FALSE
## 24 inwards RAD21_6h 0.399 1 FALSE
## 25 inwards RAD21_24h 0.119 1 FALSE
## 26 ambiguous RAD21_6h 0.489 1 FALSE
## 27 ambiguous RAD21_24h 0.0868 1 FALSE
As mentioned, also plot borders with multiple CTCF sites.
# CTCF + number vs non-CTCF
tib_stat <- tibble()
PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h"),
group = "border_ctcf_n",
ylim = c(-1.25, 0.90),
keep_PT = T)## NULL
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "PT_24h"),
group = "border_ctcf_n",
ylim = c(-1.25, 0.90),
keep_PT = T)## # A tibble: 4 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF PT_24h 0.154 0.307 FALSE
## 2 1 PT_24h 0.000641 0.00256 TRUE
## 3 2 PT_24h 0.0386 0.116 FALSE
## 4 >=3 PT_24h 0.625 0.625 FALSE
## # A tibble: 3 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 1 PT_24h 0.119 0.358 FALSE
## 2 2 PT_24h 0.174 0.358 FALSE
## 3 >=3 PT_24h 0.807 0.807 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFEL_0h", "CTCFEL_6h",
"CTCFEL_24h", "CTCFEL_96h"),
group = "border_ctcf_n",
ylim = c(-1.25, 0.90))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFEL_6h 0.0770 0.312 FALSE
## 2 nonCTCF CTCFEL_24h 0.0266 0.160 FALSE
## 3 1 CTCFEL_6h 0.137 0.375 FALSE
## 4 1 CTCFEL_24h 0.185 0.375 FALSE
## 5 2 CTCFEL_6h 0.00299 0.0209 TRUE
## 6 2 CTCFEL_24h 0.0000470 0.000376 TRUE
## 7 >=3 CTCFEL_6h 0.125 0.375 FALSE
## 8 >=3 CTCFEL_24h 0.0625 0.312 FALSE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 1 CTCFEL_6h 0.0245 0.0515 FALSE
## 2 1 CTCFEL_24h 0.0172 0.0515 FALSE
## 3 2 CTCFEL_6h 0.000208 0.00104 TRUE
## 4 2 CTCFEL_24h 0.00000271 0.0000162 TRUE
## 5 >=3 CTCFEL_6h 0.0328 0.0515 FALSE
## 6 >=3 CTCFEL_24h 0.00564 0.0226 TRUE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "WAPL_0h", "WAPL_6h",
"WAPL_24h", "WAPL_96h"),
group = "border_ctcf_n",
ylim = c(-1.25, 0.90))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF WAPL_6h 0.00563 0.0225 TRUE
## 2 nonCTCF WAPL_24h 0.00000217 0.0000152 TRUE
## 3 1 WAPL_6h 0.00000926 0.0000556 TRUE
## 4 1 WAPL_24h 0.000000743 0.00000594 TRUE
## 5 2 WAPL_6h 0.0129 0.0388 TRUE
## 6 2 WAPL_24h 0.000607 0.00304 TRUE
## 7 >=3 WAPL_6h 0.0625 0.125 FALSE
## 8 >=3 WAPL_24h 0.188 0.188 FALSE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 1 WAPL_6h 0.0148 0.0886 FALSE
## 2 1 WAPL_24h 0.0541 0.162 FALSE
## 3 2 WAPL_6h 0.0740 0.162 FALSE
## 4 2 WAPL_24h 0.0215 0.107 FALSE
## 5 >=3 WAPL_6h 0.0253 0.107 FALSE
## 6 >=3 WAPL_24h 0.369 0.369 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "CTCFWAPL_0h", "CTCFWAPL_6h",
"CTCFWAPL_24h", "CTCFWAPL_96h"),
group = "border_ctcf_n",
ylim = c(-1.25, 0.90))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF CTCFWAPL_6h 0.506 1 FALSE
## 2 nonCTCF CTCFWAPL_24h 0.988 1 FALSE
## 3 1 CTCFWAPL_6h 0.0585 0.351 FALSE
## 4 1 CTCFWAPL_24h 0.256 1 FALSE
## 5 2 CTCFWAPL_6h 0.00479 0.0383 TRUE
## 6 2 CTCFWAPL_24h 0.0179 0.125 FALSE
## 7 >=3 CTCFWAPL_6h 0.188 0.938 FALSE
## 8 >=3 CTCFWAPL_24h 0.625 1 FALSE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 1 CTCFWAPL_6h 0.282 0.903 FALSE
## 2 1 CTCFWAPL_24h 0.336 0.903 FALSE
## 3 2 CTCFWAPL_6h 0.00274 0.0164 TRUE
## 4 2 CTCFWAPL_24h 0.0105 0.0523 FALSE
## 5 >=3 CTCFWAPL_6h 0.226 0.903 FALSE
## 6 >=3 CTCFWAPL_24h 0.443 0.903 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tmp <- PlotBordersWithConfidenceIntervals(tib_damid,
c("PT_0h", "RAD21_0h", "RAD21_6h",
"RAD21_24h"),
group = "border_ctcf_n",
ylim = c(-1.25, 0.90))## # A tibble: 8 × 5
## group key pvalue padj sign
## <fct> <fct> <dbl> <dbl> <lgl>
## 1 nonCTCF RAD21_6h 0.000121 0.000847 TRUE
## 2 nonCTCF RAD21_24h 0.0000471 0.000377 TRUE
## 3 1 RAD21_6h 0.00132 0.00792 TRUE
## 4 1 RAD21_24h 0.171 0.856 FALSE
## 5 2 RAD21_6h 0.670 1 FALSE
## 6 2 RAD21_24h 0.815 1 FALSE
## 7 >=3 RAD21_6h 0.812 1 FALSE
## 8 >=3 RAD21_24h 1 1 FALSE
## # A tibble: 6 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 1 RAD21_6h 0.569 1 FALSE
## 2 1 RAD21_24h 0.0781 0.469 FALSE
## 3 2 RAD21_6h 0.341 1 FALSE
## 4 2 RAD21_24h 0.117 0.584 FALSE
## 5 >=3 RAD21_6h 0.763 1 FALSE
## 6 >=3 RAD21_24h 0.434 1 FALSE
tib_stat <- bind_rows(tib_stat, tmp)
tib_stat %>%
mutate(padj = p.adjust(pvalue),
sign = padj < 0.05) %>%
print(n = 40)## # A tibble: 27 × 5
## group key pvalue padj sign
## <chr> <chr> <dbl> <dbl> <lgl>
## 1 1 PT_24h 0.119 1 FALSE
## 2 2 PT_24h 0.174 1 FALSE
## 3 >=3 PT_24h 0.807 1 FALSE
## 4 1 CTCFEL_6h 0.0245 0.466 FALSE
## 5 1 CTCFEL_24h 0.0172 0.360 FALSE
## 6 2 CTCFEL_6h 0.000208 0.00541 TRUE
## 7 2 CTCFEL_24h 0.00000271 0.0000730 TRUE
## 8 >=3 CTCFEL_6h 0.0328 0.557 FALSE
## 9 >=3 CTCFEL_24h 0.00564 0.135 FALSE
## 10 1 WAPL_6h 0.0148 0.325 FALSE
## 11 1 WAPL_24h 0.0541 0.865 FALSE
## 12 2 WAPL_6h 0.0740 1 FALSE
## 13 2 WAPL_24h 0.0215 0.430 FALSE
## 14 >=3 WAPL_6h 0.0253 0.466 FALSE
## 15 >=3 WAPL_24h 0.369 1 FALSE
## 16 1 CTCFWAPL_6h 0.282 1 FALSE
## 17 1 CTCFWAPL_24h 0.336 1 FALSE
## 18 2 CTCFWAPL_6h 0.00274 0.0685 FALSE
## 19 2 CTCFWAPL_24h 0.0105 0.241 FALSE
## 20 >=3 CTCFWAPL_6h 0.226 1 FALSE
## 21 >=3 CTCFWAPL_24h 0.443 1 FALSE
## 22 1 RAD21_6h 0.569 1 FALSE
## 23 1 RAD21_24h 0.0781 1 FALSE
## 24 2 RAD21_6h 0.341 1 FALSE
## 25 2 RAD21_24h 0.117 1 FALSE
## 26 >=3 RAD21_6h 0.763 1 FALSE
## 27 >=3 RAD21_24h 0.434 1 FALSE
Good.
All relevant data is saved from the matching document looking at borders without
active genes.
#
# saveRDS(metadata.damid,
# file.path(output.dir, "metadata_damid.rds"))
# saveRDS(damid,
# file.path(output.dir, "damid.rds"))
# saveRDS(bin.size,
# file.path(output.dir, "bin_size.rds"))
#
# # Also, as tsv file
# tib_damid <- as_tibble(damid) %>%
# dplyr::select(-width, -strand) %>%
# dplyr::select(-contains("CTCF_"), -contains("CTCFNQ_"))
# print(names(tib_damid))
#
# tib_damid <- tib_damid %>%
# rename_all(function(x) str_replace(x, "CTCFEL", "CTCF"))
# print(names(tib_damid))
#
# write_tsv(tib_damid,
# file.path(output.dir, "damid_average_replicates.tsv"))
#
# # Also, for replicates individually
# damid_replicates <- as_tibble(damid_replicates) %>%
# dplyr::select(-width, -strand,
# -contains("NQ"))
# print(names(damid_replicates))
#
# damid_replicates <- damid_replicates %>%
# rename_all(~ c("seqnames", "start", "end",
# "PT_0h_r1", "PT_0h_r2",
# "PT_24h_r1", "PT_24h_r2",
# "CTCF_0h_r1", "CTCF_0h_r2",
# "CTCF_6h_r1", "CTCF_6h_r2",
# "CTCF_24h_r1", "CTCF_24h_r2",
# "CTCF_96h_r1", "CTCF_96h_r2",
# "WAPL_0h_r1", "WAPL_0h_r2", "WAPL_0h_r3",
# "WAPL_6h_r1", "WAPL_6h_r2", "WAPL_6h_r3",
# "WAPL_24h_r1", "WAPL_24h_r2", "WAPL_24h_r3",
# "WAPL_96h_r1", "WAPL_96h_r2", "WAPL_96h_r3",
# "CTCFWAPL_0h_r1", "CTCFWAPL_0h_r2",
# "CTCFWAPL_0h_r3", "CTCFWAPL_0h_r4",
# "CTCFWAPL_6h_r1", "CTCFWAPL_6h_r2",
# "CTCFWAPL_6h_r3", "CTCFWAPL_6h_r4",
# "CTCFWAPL_24h_r1",
# "CTCFWAPL_24h_r3", "CTCFWAPL_24h_r4",
# "CTCFWAPL_96h_r1", "CTCFWAPL_96h_r2",
# "CTCFWAPL_96h_r3",
# "RAD21_0h_r1", "RAD21_0h_r2",
# "RAD21_6h_r1", "RAD21_6h_r2",
# "RAD21_24h_r1", "RAD21_24h_r2"))
# print(names(damid_replicates))
#
# write_tsv(damid_replicates,
# file.path(output.dir, "damid_individual_replicates.tsv"))We generally observe the same effects at border with active genes. The effect size seems a bit smaller, but it’s difficult to tell.
sessionInfo()## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] knitr_1.33 corrr_0.4.3 broom_0.7.9
## [4] ggbeeswarm_0.6.0 M3C_1.12.0 yaml_2.2.1
## [7] caTools_1.18.2 rtracklayer_1.50.0 GenomicRanges_1.42.0
## [10] GenomeInfoDb_1.26.7 IRanges_2.24.1 S4Vectors_0.28.1
## [13] BiocGenerics_0.36.1 forcats_0.5.1 stringr_1.4.0
## [16] dplyr_1.0.7 purrr_0.3.4 readr_2.0.0
## [19] tidyr_1.1.3 tibble_3.1.6 ggplot2_3.3.5
## [22] tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 matrixStats_0.60.0
## [3] fs_1.5.0 lubridate_1.7.10
## [5] RColorBrewer_1.1-2 doParallel_1.0.16
## [7] httr_1.4.2 tools_4.0.5
## [9] backports_1.2.1 bslib_0.2.5.1
## [11] utf8_1.2.2 R6_2.5.1
## [13] vipor_0.4.5 DBI_1.1.1
## [15] colorspace_2.0-2 withr_2.4.2
## [17] tidyselect_1.1.1 compiler_4.0.5
## [19] cli_3.1.0 rvest_1.0.1
## [21] Biobase_2.50.0 xml2_1.3.2
## [23] DelayedArray_0.16.3 labeling_0.4.2
## [25] sass_0.4.0 scales_1.1.1
## [27] askpass_1.1 digest_0.6.28
## [29] Rsamtools_2.6.0 rmarkdown_2.11
## [31] XVector_0.30.0 pkgconfig_2.0.3
## [33] htmltools_0.5.1.1 umap_0.2.7.0
## [35] MatrixGenerics_1.2.1 highr_0.9
## [37] dbplyr_2.1.1 rlang_0.4.12
## [39] readxl_1.3.1 rstudioapi_0.13
## [41] farver_2.1.0 jquerylib_0.1.4
## [43] generics_0.1.0 jsonlite_1.7.2
## [45] BiocParallel_1.24.1 RCurl_1.98-1.3
## [47] magrittr_2.0.1 GenomeInfoDbData_1.2.4
## [49] Matrix_1.3-4 Rcpp_1.0.7
## [51] munsell_0.5.0 fansi_0.5.0
## [53] reticulate_1.20 lifecycle_1.0.1
## [55] stringi_1.7.3 SummarizedExperiment_1.20.0
## [57] zlibbioc_1.36.0 Rtsne_0.15
## [59] matrixcalc_1.0-5 grid_4.0.5
## [61] doSNOW_1.0.19 crayon_1.4.2
## [63] lattice_0.20-44 Biostrings_2.58.0
## [65] haven_2.4.1 hms_1.1.0
## [67] pillar_1.6.4 corpcor_1.6.9
## [69] codetools_0.2-18 reprex_2.0.0
## [71] XML_3.99-0.6 glue_1.5.0
## [73] evaluate_0.14 modelr_0.1.8
## [75] png_0.1-7 vctrs_0.3.8
## [77] tzdb_0.1.2 foreach_1.5.1
## [79] cellranger_1.1.0 openssl_1.4.4
## [81] gtable_0.3.0 assertthat_0.2.1
## [83] xfun_0.24 RSpectra_0.16-0
## [85] snow_0.4-3 iterators_1.0.13
## [87] beeswarm_0.4.0 GenomicAlignments_1.26.0
## [89] cluster_2.1.2 ellipsis_0.3.2